# -*- coding: utf-8 -*- #
# frozen_string_literal: true

require 'open-uri'

APACHE_DOCS_URI = "https://downloads.apache.org/httpd/docs/"
APACHE_KEYWORDS_FILE = "./lib/rouge/lexers/apache/keywords.rb"

namespace :builtins do
  task :apache do
    generator = Rouge::Tasks::Builtins::Apache.new

    input = URI.open(APACHE_DOCS_URI) { |f| f.read }
    files = generator.download_docs(input)
    list  = files.delete("directives.html")
    mods  = files.values

    keywords = generator.extract_keywords(list)
    values   = generator.extract_values(mods)

    output = generator.render_output(keywords, values)

    File.write(APACHE_KEYWORDS_FILE, output)
  end
end

module Rouge
  module Tasks
    module Builtins
      class Apache
        def download_docs(input)
          files = Hash.new

          name, ext = input.match(/href="(.+\.en)(\.zip)"/) { |m| [m[1], m[2]] }
          docs_zip = APACHE_DOCS_URI + name + ext

          system "mkdir -p /tmp/rouge"
          Dir.chdir "/tmp/rouge" do
            system "wget -q #{docs_zip}"
            system "unzip -oq #{name + ext}"
            Dir.chdir "./#{name}/mod/" do
              Dir.glob("./{directives,core,mod_*,mpm*}.html").each do |f|
                files[File.basename(f)] = File.read(f)
              end
            end
          end

          files
        end

        def extract_keywords(input)
          keywords = Hash.new { |h,k| h[k] = Array.new }

          input.each_line do |line|
            if line.scrub =~ %r(<li><a.*?>(&lt;)?(.*?)(&gt;)?</a></li>)
              next unless $2

              if $1 && $3
                key = "sections"
              else
                key = "directives"
              end

              keywords[key].push $2.downcase
            end
          end

          keywords
        end

        def extract_values(inputs)
          values = Set.new

          inputs.each do |input|
            input = input.scrub
            input.scan(%r[Syntax:.*?<code>(.*?)</code>]m) do |m|
              m[0].split(/[\s|]/).
                drop(1).
                filter { |v| v.index(/^\w[\w*-]+$/) }.
                each { |v| values.add(v.downcase) }
            end

            input.scan(%r[<dt>\s*(?:<.+?>\s*)*(.*?)<]m) do |m|
              m[0].split(/[=\[\]]/).
                filter { |v| v.index(/^\w[\w*-]+$/) }.
                each { |v| values.add(v.downcase) }
            end
          end

          values.to_a.sort
        end

        def render_output(keywords, values, &b)
          return enum_for(:render_output, keywords, values).to_a.join("\n") unless b

          yield   "# -*- coding: utf-8 -*- #"
          yield   "# frozen_string_literal: true"
          yield   ""
          yield   "# DO NOT EDIT"
          yield   "# This file is automatically generated by `rake builtins:apache`."
          yield   "# See tasks/builtins/apache.rake for more info."
          yield   ""
          yield   "module Rouge"
          yield   "  module Lexers"
          yield   "    class Apache"
          keywords.each do |k,v|
            yield "      def self.#{k}"
            yield "        @#{k} ||= Set.new #{v.inspect}"
            yield "      end"
            yield ""
          end
          yield   "      def self.values"
          yield   "        @values ||= Set.new #{values.inspect}"
          yield   "      end"
          yield   "    end"
          yield   "  end"
          yield   "end"
        end
      end
    end
  end
end
